#include "bcharset.h"
#include <vector>
#include <locale>	// std::wstring_convert
#include <codecvt>	// std::codecvt_utf8

using namespace butils;

std::string charset::bytes2hexs(const char* data, size_t len)
{
    std::string res;
    std::string sp("0123456789ABCDEF");
    for (size_t i = 0; i < len; i++)
    {
        int b = 0x0F & (data[i] >> 4);
        res.append(1, sp.at(b));
        b = 0x0F & data[i];
        res.append(1, sp.at(b));
    }
    return res;
}

std::string charset::hexs2bytes(const char* data, size_t len)
{
    std::string res;
    char num = 0, bit = 0, bits = 0;
    size_t ti = 0;
    while (data != nullptr)
    {
        num = data[ti];
        ti++;
        if (num >= 0x30 && num <= 0x39)
            num -= 0x30;
        else if (num >= 0x41 && num <= 0x46)
            num -= 0x37;
        else if (num >= 0x61 && num <= 0x66)
            num -= 0x57;
        else if ((0 == len && 0 == num) || (0 != len && ti >= len))
            break;
        else
            continue;

        if (bits)
            res.append(1, (bit |= num));
        else
            bit = num << 4;
        bits ^= 1;
    }
    return res;
}

bool charset::is_utf8(const std::string& str)
{
    return is_utf8(str.c_str(), static_cast<int>(str.length()));
}

bool charset::is_utf8(const char* data, int len)
{
    if (data == nullptr || len <= 0)
        return true;
    unsigned int bcnt = 0;//UFT8可用1-6个字节编码,ASCII用一个字节  
    unsigned char chr = *data;
    bool is_ascii = true;
    for (int i = 0; i < len; ++i) {
        chr = *(data + i);
        //判断是否ASCII编码,如果不是,说明有可能是UTF8,ASCII用7位编码,最高位标记为0,0xxxxxxx 
        if (bcnt == 0 && (chr & 0x80) != 0) {
            is_ascii = false;
        }
        if (bcnt == 0) {
            //如果不是ASCII码,应该是多字节符,计算字节数  
            if (chr >= 0x80) {
                if (chr >= 0xFC && chr <= 0xFD) {
                    bcnt = 6;
                }
                else if (chr >= 0xF8) {
                    bcnt = 5;
                }
                else if (chr >= 0xF0) {
                    bcnt = 4;
                }
                else if (chr >= 0xE0) {
                    bcnt = 3;
                }
                else if (chr >= 0xC0) {
                    bcnt = 2;
                }
                else {
                    return false;
                }
                bcnt--;
            }
        }
        else {
            //多字节符的非首字节,应为 10xxxxxx 
            if ((chr & 0xC0) != 0x80) {
                return false;
            }
            //减到为零为止
            bcnt--;
        }
    }
    //违返UTF8编码规则 
    if (bcnt != 0) {
        return false;
    }
    if (is_ascii) { //如果全部都是ASCII, 也是UTF8
        return true;
    }
    return true;
}

const std::string charset::ans_to_utf8(const std::string& a_str)
{
    std::vector<wchar_t> buff(a_str.size());
#ifdef _MSC_VER
    std::locale loc("");//"zh-CN"
#else
    std::locale loc("zh_CN.GB18030");
#endif
    wchar_t* ws_next = nullptr;
    const char* sz_next = nullptr;
    mbstate_t state = {};
    int res = std::use_facet<std::codecvt<wchar_t, char, mbstate_t> >
        (loc).in(state,
            a_str.data(), a_str.data() + a_str.size(), sz_next,
            buff.data(), buff.data() + buff.size(), ws_next);

    if (std::codecvt_base::ok == res)
    {
        std::wstring_convert<std::codecvt_utf8<wchar_t>> cutf8;
        return cutf8.to_bytes(std::wstring(buff.data(), ws_next));
    }
    return "";
}

const std::string charset::utf8_to_ans(const std::string& u_str)
{
    std::wstring_convert<std::codecvt_utf8<wchar_t>> cutf8;
    std::wstring wTemp = cutf8.from_bytes(u_str);
#ifdef _MSC_VER
    std::locale loc("zh-CN");
#else
    std::locale loc("zh_CN.GB18030");
#endif
    const wchar_t* ws_next = nullptr;
    char* sz_next = nullptr;
    mbstate_t state = {};

    std::vector<char> buff(wTemp.size() * 2);
    int res = std::use_facet<std::codecvt<wchar_t, char, mbstate_t> >
        (loc).out(state,
            wTemp.data(), wTemp.data() + wTemp.size(), ws_next,
            buff.data(), buff.data() + buff.size(), sz_next);

    if (std::codecvt_base::ok == res)
    {
        return std::string(buff.data(), sz_next);
    }
    return "";
}

const std::string charset::try_ans_to_utf8(const std::string& a_str)
{
    return is_utf8(a_str) ? a_str : ans_to_utf8(a_str);
}